#' ---
#' title: "Coercion and Provocation"
#' author: "Allan Dafoe, Sophia Hatz, Baobao Zhang"
#' date: "July 22, 20206"
#' output: pdf_document
#' ---


# Start log
sink("cp_analysis_log.txt")

#' Date of 
{{format(Sys.Date(), format="%B %d %Y")}}

#' Load libraries
library(knitr)
library(MASS)
library(RItools)
library(sandwich)
library(lmtest)
library(ggplot2)
library(plyr)
library(nnet)
library(xtable)
library(stargazer)
library(car)
library(arm)
library(magrittr)
library(dplyr)
library(ri2)

#' ### Survey Experiment Analysis Code
#' **This R script** performs the analysis procedures described in the pre-analysis plan using clean test data. The test data was generated on Qualtrics and cleaned with the script "1_CP_Analysis.R". Analysis procedures in this script include:
#'
#' - hypothesis tests (primary specification)
#' - coefficient plots of key experimental results
#' - data quality tests 
#' - balance tests  

#' 
#' Clear environment, set seed.
rm(list = ls())
set.seed(1237658)

#' Set working directories
data_dir <- INSERT DATA FOLDER
images_dir <- FOLDER WHERE THE FIGURES ARE SAVED

#' Load data.
load(paste0(data_dir, "CP_data.RData"))
head(CP)

#' The variables in this dataset:
names(CP)

#' The sample size is
nrow(CP)

#' Function to make data frames for coefficient plots:
make.df <- function(var,comp,n,model,group){
  data.frame(
    var = as.character(var),
    comp = as.character(comp),
    n = n,
    coeff=model[2,1],
    se=model[2,2],
    ci90 = model[2,2]*(-qnorm((1-0.9)/2)), # 90% and 95% confidence intervals are calculated based on the normal distribution
    ci95 = model[2,2]*(-qnorm((1-0.95)/2)),
    pval = paste("p",ifelse(model[2,4] < 0.001,
                            "< 0.001",
                            paste("=", sprintf("%.3f", round(model[2,4], 3)), sep=" "))),
    group = as.character(group)) 
  #one-sided p-value
}

#' Function to estimate robust standard errors
coef_r <- function(lm_object, v_c_matrix = "HC1",
                   var = NULL, comp = NULL, group = NULL) {
  require("lmtest")
  require("sandwich")
  temp <- coeftest(x = lm_object, vcov. = vcovHC(lm_object, type = v_c_matrix))
  if (!is.null(var) & !is.null(var) & !is.null(group)) {
    return(make.df(var = var, comp = comp , n = length(lm_object$fitted.values),
                   model = temp, group = group))
  } else {
    return(temp)
  }
}

#' Function to combine plots
multiplot <- function(..., plotlist=NULL, file, cols=1, layout=NULL) {
  library(grid)
  
  # Make a list from the ... arguments and plotlist
  plots <- c(list(...), plotlist)
  
  numPlots = length(plots)
  
  # If layout is NULL, then use 'cols' to determine layout
  if (is.null(layout)) {
    # Make the panel
    # ncol: Number of columns of plots
    # nrow: Number of rows needed, calculated from # of cols
    layout <- matrix(seq(1, cols * ceiling(numPlots/cols)),
                     ncol = cols, nrow = ceiling(numPlots/cols))
  }
  
  if (numPlots==1) {
    print(plots[[1]])
    
  } else {
    # Set up the page
    grid.newpage()
    pushViewport(viewport(layout = grid.layout(nrow(layout), ncol(layout))))
    
    # Make each plot, in the correct location
    for (i in 1:numPlots) {
      # Get the i,j matrix positions of the regions that contain this subplot
      matchidx <- as.data.frame(which(layout == i, arr.ind = TRUE))
      
      print(plots[[i]], vp = viewport(layout.pos.row = matchidx$row,
                                      layout.pos.col = matchidx$col))
    }
  }
}

#' Function to plot likert scale
plot.likert <- function (vec, possible.values = sort(unique(vec)), 
                         left = "linker Pol", right = "rechter Pol",
                         plot.median = F, plot.sd = T, plot.iqr = F, 
                         include.absolutes = T, include.percentages = T, 
                         own.margins = c(2, 2, 3, 2),
                         othermean = NULL, ...) {
  tab <- table(vec)
  if (length(tab) != length(possible.values)) {
    values.not.in.tab <- possible.values[!(possible.values %in% names(tab))]
    for (val in values.not.in.tab) {
      tab[as.character(val)] <- 0
    }
    tab <- tab[sort(names(tab))]
  }
  prop.tab <- prop.table(tab) * 100
  v.se <- sd(vec, na.rm = T)/sqrt(length(vec[!is.na(vec)]))
  v.m <- mean(vec, na.rm = T)
  v.med <- median(vec, na.rm = T)
  old.mar <- par("mar")
  par(mar = own.margins)
  # Setting-up plot region
  plot(x = c(min(as.numeric(names(tab)), na.rm = T) - 1.1, max(as.numeric(names(tab)), na.rm = T) + 1.1),
       y = c(0, 100), type = "n", xaxt = "n", yaxt = "n",
       xlab = "", ylab = "", bty = "n", ...)
  # Bars
  rect(xleft = as.numeric(names(prop.tab)) - .4,
       ybottom = 0,
       xright = as.numeric(names(prop.tab)) + .4,
       ytop = prop.tab,
       border = "#00000000", col = "#ADD8E6E6")
  # Lower black line
  lines(x = c(min(as.numeric(names(tab)), na.rm = T) - .6, max(as.numeric(names(tab)), na.rm = T) + .6),
        y = c(0, 0), col = "black", lwd = 2)
  # Upper black line
  lines(x = c(min(as.numeric(names(tab)), na.rm = T) - .6, max(as.numeric(names(tab)), na.rm = T) + .6),
        y = c(100, 100), col = "black", lwd = 2)
  # Group means lines
  for (n.i in names(tab)) {
    lines(x = c(n.i, n.i), y = c(0, 100), col = rgb(0,0,0,alpha=0.3))
  }
  # Lines at the sides
  #abline(v = min(as.numeric(names(tab)), na.rm = T) - .6, col = "black", lwd = 2)
  #abline(v = max(as.numeric(names(tab)), na.rm = T) + .6, col = "black", lwd = 2)
  lines(x = c(min(as.numeric(names(tab)), na.rm = T) - .6,
              min(as.numeric(names(tab)), na.rm = T) - .6),
        y = c(0, 100), col = "black", lwd = 2)
  lines(x = c(max(as.numeric(names(tab)), na.rm = T) + .6,
              max(as.numeric(names(tab)), na.rm = T) + .6),
        y = c(0, 100), col = "black", lwd = 2)
  # Grey rectangles at sides
  #   rect(xleft = min(as.numeric(names(tab)), na.rm = T) - 1.1,
  #        ybottom = 0,
  #        xright = min(as.numeric(names(tab)), na.rm = T) - .6,
  #        ytop = 100,
  #        border = "#00000000", col = "grey")
  #   rect(xleft = max(as.numeric(names(tab)), na.rm = T) + .6,
  #        ybottom = 0,
  #        xright = max(as.numeric(names(tab)), na.rm = T) + 1.1,
  #        ytop = 100,
  #        border = "#00000000", col = "grey")
  mtext(names(prop.tab), side = 1, at = names(prop.tab))
  # Percentages and numbers at the top
  if (include.percentages) mtext(paste(round(prop.tab, 0), "%"), side = 3, at = names(prop.tab), line = -.3, col = "blue")
  if (include.absolutes) mtext(tab, side = 3, at = names(tab), line = .5, cex = .8, font = 2)
  # Mean line
  points(x = c(v.m), y = c(90), pch=19, col = "black")
  # Median line
  if (plot.median) lines(x = c(v.med, v.med), y = c(90, 80), lwd = 4, col = "#00FF00AA")
  # Inter-Quartile range
  if (plot.iqr) {
    arrows(x0 = c(v.med, v.med), x1 = c(v.med - IQR(vec, na.rm = T), 
                                        v.med + IQR(vec, na.rm = T)),
           y0 = c(85, 85), y1 = c(85, 85),
           angle = 90, length = 0, lwd = 1)
  }
  # Other mean line
  if (!is.null(othermean)) lines(x = c(othermean, othermean), 
                                 y = c(85, 75), lwd = 6, col = "#00000099")
  # SD lines
  if (plot.sd) { arrows(x0 = c(v.m, v.m), 
                        x1 = c(v.m + qnorm(0.025)*v.se, v.m + qnorm(0.975)*v.se), 
                        y0 = c(90, 90), y1 = c(90, 90),
                        angle = 90, length = 0, lwd = 1) }
  # Left label
  mtext(left, side = 2, line = -.5)
  # Right label
  mtext(right, side = 4, line = -.5)
  par(mar = old.mar)
}

comp_levels <- c("Attack vs. Control", "Collision vs. Non-Collision",
                 "Accident vs. Control", "Attack vs. Accident", "Collision vs. Accident")


#' Change variables
#' mean impute CP$MagStake values where CP$RepStake is missing
CP$MagStake[is.na(CP$MagStake) & CP$RepStake == 1] <- 
  round(mean(CP$MagStake, na.rm = TRUE))
#' create new variable for reputation stakes that combines RepStake and MagStake
CP$RepStake_num <- NA
CP$RepStake_num[is.na(CP$MagStake)] <- 0
CP$RepStake_num[CP$MagStake == 0] <- 1
CP$RepStake_num[CP$MagStake == 1] <- 2
CP$RepStake_num[CP$MagStake == 2] <- 3
#' check it works
table(CP$RepStake_num, useNA = "always")
table(CP$RepStake, useNA = "always")
table(CP$MagStake, useNA = "always")
#' create variable that is Attack vs Accident
CP$AttackAccident <- NA
CP$AttackAccident[CP$Attack == 1] <- 1
CP$AttackAccident[CP$Accident == 1] <- 0
#' create variable that is Collision vs Accident
CP$CollisionAccident <- NA
CP$CollisionAccident[CP$Collision == 1] <- 1
CP$CollisionAccident[CP$Accident == 1] <- 0
#' standardize military intention variable
CP$MilInt_std <- CP$MilInt
CP$MilInt_std[which(CP$Incident.f %in% c("Attack", "Control"))] <-
  (CP$MilInt[which(CP$Incident.f %in% c("Attack", "Control"))]-
     mean(CP$MilInt[CP$Incident.f %in% c("Attack", "Control")]))/
  sd(CP$MilInt[CP$Incident.f %in% c("Attack", "Control")])
CP$MilInt_std[which(CP$Incident.f %in% c("Collision", "Non-Collision"))] <-
  (CP$MilInt[which(CP$Incident.f %in% c("Collision", "Non-Collision"))]-
     mean(CP$MilInt[CP$Incident.f %in% c("Collision", "Non-Collision")]))/
  sd(CP$MilInt[CP$Incident.f %in% c("Collision", "Non-Collision")])

#' #### Import .csv with table of analysis
a_t <- read.csv(file = paste0(data_dir, "analyze_table.csv"), stringsAsFactors=FALSE)
#not_var <- unique(a_t$formula1)[!unique(a_t$formula1) %in% names(CP)]
#a_t <- a_t[a_t$formula1 != not_var,]
a_t[a_t$group == "cost",]
# Fix the death count variable 
a_t$formula1[a_t$formula1 == "MatCosts_deaths"] <- "MatCosts"

#' Likert scale graphs
my_likert <- function(var_name, group_lab, left_lab, right_lab, 
                      possible.values, own.margins, width, height) {
  pdf(file = paste0(images_dir, 
                    var_name, group_lab, ".pdf"), width = width, height = height)
  plot.likert(vec = CP[,var_name][CP$Incident.f == group_lab], 
              left = left_lab, right = right_lab, own.margins = own.margins,
              possible.values = possible.values, main = group_lab)
  dev.off()
}

#' make the graphs for the Likert scales
#' main outcomes
#' Figure 13: Distribution of Responses: Levels of Escalation
lapply(unique(CP$Incident.f), my_likert, var = "LevelEsc", 
       left_lab = NULL,
       right_lab = NULL,
       possible.values = 0:3, own.margins = c(2,0,4.5,0), width = 6, height = 2.5)
#' Figure 14: Distribution of Responses: Economic Costs
lapply(unique(CP$Incident.f), my_likert, var = "EcCosts", 
       left_lab = NULL,
       right_lab = NULL,
       possible.values = 0:9, own.margins = c(2,0,4.5,0), width = 8, height = 3)
#' Figure 15: Distribution of Responses: Risk of War
lapply(unique(CP$Incident.f), my_likert, var = "RiskWar", 
       left_lab = NULL,
       right_lab = NULL,
       possible.values = 0:6, own.margins = c(2,0,4.5,0), width = 6, height = 2.5)
#' Figure 16: Distribution of Responses: Military Deaths
lapply(unique(CP$Incident.f), my_likert, var = "MilDeaths", 
       left_lab = NULL,
       right_lab = NULL,
       possible.values = 0:7, own.margins = c(2,0,4.5,0), width = 7, height = 3)
#' Figure 17: Distribution of Responses: Respondents' Considerations When Deciding to Maintain Claims - Avoid Risk of Conflict
lapply(unique(CP$Incident.f), my_likert, var = "ImpCosts", 
       left_lab = NULL,
       right_lab = NULL,
       possible.values = 0:1, own.margins = c(2,0,4.5,0), width = 3, height = 3)
#' Figure 21: Distribution of Responses: Respondents' Considerations When Deciding to Maintain Claims - Honor US Pilot
lapply(unique(CP$Incident.f), my_likert, var = "ImpHonor", 
       left_lab = NULL,
       right_lab = NULL,
       possible.values = 0:1, own.margins = c(2,0,4.5,0), width = 3, height = 3)
#' Figure 20: Distribution of Responses: Respondents' Considerations When Deciding to Maintain Claims - Maintaining US Reputation
lapply(unique(CP$Incident.f), my_likert, var = "ImpReputation", 
       left_lab = NULL,
       right_lab = NULL,
       possible.values = 0:1, own.margins = c(2,0,4.5,0), width = 3, height = 3)
#' Figure 22: Distribution of Responses: Respondents' Considerations When Deciding to Maintain Claims - Punishing China
lapply(unique(CP$Incident.f), my_likert, var = "ImpRevenge", 
       left_lab = NULL,
       right_lab = NULL,
       possible.values = 0:1, own.margins = c(2,0,4.5,0), width = 3, height = 3)
#' Figure 18: Distribution of Responses: Respondents' Considerations When Deciding to Maintain Claims - Preserve US Access to the E. China China
lapply(unique(CP$Incident.f), my_likert, var = "ImpStakes", 
       left_lab = NULL,
       right_lab = NULL,
       possible.values = 0:1, own.margins = c(2,0,4.5,0), width = 3, height = 3)
#' Figure 19: Distribution of Responses: Respondents' Considerations When Deciding to Maintain Claims - Resist China's Threat to the US
lapply(unique(CP$Incident.f), my_likert, var = "ImpThreat", 
       left_lab = NULL,
       right_lab = NULL,
       possible.values = 0:1, own.margins = c(2,0,4.5,0), width = 3, height = 3)


#' Analyzing the data
res <- coef_r(lm_object = lm(formula(paste0(a_t$formula1[1], "~", a_t$formula2[1])), data=CP),
              var = a_t$var[1], comp = a_t$comp[1], group = a_t$group[1])
for (i in 2:nrow(a_t)) {
  temp <- coef_r(lm(formula(paste0(a_t$formula1[i], " ~ ", 
                                   a_t$formula2[i])), data=CP),
                 var = a_t$var[i], comp = a_t$comp[i], group = a_t$group[i])
  res <- rbind(res, temp)
}


#' Make a function to generate figures
make_plots <- function(res = res, group = "main", 
                       shape_val = 21:25,
                       color_val = rev(c("red", "darkgreen", "blue", "orange", "deeppink")),
                       ncol_num = 2, make_facets = TRUE,
                       comp_type = unique(res$comp), ggtitle = NULL) {
  temp <- ggplot2::ggplot(res[res$group %in% group & res$comp %in% comp_type,], 
                          aes(x=comp, y=coeff,color=comp,shape=comp)) +    
    geom_hline(yintercept = 0, colour = gray(0.5), lty = 2) +
    geom_linerange(aes(x = comp, ymin = coeff - ci90, ymax = coeff + ci90),
                   lwd = 1.5,size=5,position = position_dodge(0.5)) +
    geom_linerange(aes(x = comp, ymin = coeff - ci95, ymax = coeff + ci95),
                   lwd = 1,size=5,position = position_dodge(0.5)) +
    geom_point(fill = "white", size = 3) + 
    geom_text(aes(x=comp, y=coeff, label=paste0(pval, "; N = ",n)),
              hjust=0.5, vjust=-0.9,size=4,color="black") + #label coeffs with two-sided p-values
    scale_colour_manual(values = color_val) + 
    scale_shape_manual(values = shape_val) +
    xlab("Comparisons") + ylab("Estimated Treatment Effects") + 
    coord_flip() + 
    theme_bw() + ggtitle(ggtitle) + 
    theme(legend.position="none") +
    theme(panel.grid.minor.x= element_line(colour=gray(0.9)),
          panel.grid.major.x= element_line(colour=gray(0.9))) +
    theme(panel.grid.major.y = element_blank()) +
    theme(axis.ticks.y = element_blank()) +
    theme(axis.text.y = element_text(face="bold")) +
    theme(plot.title=element_text(face="bold")) +
    theme(strip.text.x = element_text(face = "bold"))
  if (make_facets) {
    temp + facet_wrap(~var, ncol = ncol_num, scales = "free_y")  
  } else {
    temp
  }
}

#' Make coefficient plots

# Set color and shapes
shape_val = 21:25
color_val = c("red", "darkgreen", "blue", "orange", "deeppink")


#' main results
res$comp <- factor(res$comp, levels = rev(unique(res$comp)))
res_main <- res[res$group == "main",]
res_main$var <- factor(as.character(res_main$var), levels =
                         unique(as.character(res_main$var))[c(2,1,3,4)])
# Horizontal graph: not included in the paper or appendix
make_plots(res = res_main, group = "main", ncol_num = 2, 
           color_val = c("blue", "darkgreen", "red"),
           shape_val = rep(21,3)) # main outcomes
ggsave(filename = paste0(images_dir, "main_outcomes_escalation_coef_plot.pdf"), width = 10, height = 4)
# Vertical graph of the main results
#' Figure 10: Main Outcomes: Escalation in the Dispute
make_plots(res = res_main, group = "main", ncol_num = 1, color_val = c("blue", "darkgreen", "red"),
           shape_val = rep(21,3)) # main outcomes
ggsave(filename = paste0(images_dir, "main_outcomes_escalation_coef_plot_vert.pdf"), width = 7.5, height = 7.5)

# Reverse labels
res_main$var_r <- factor(res_main$var, levels = rev(levels(res_main$var)))

# Function to generate the graphs
main_plots <- function(comparison, ylab, color, shape, file_name) {
  gg <- ggplot(res_main[res_main$comp == comparison,], aes(x = var_r, y = coeff, 
                                                           color = comp, shape=comp)) +
    geom_hline(yintercept = 0, colour = gray(0.5), lty = 2) +
    geom_linerange(aes(x = var_r, ymin = coeff - ci90, ymax = coeff + ci90),
                   lwd = 1.5, size=5) +
    geom_linerange(aes(x = var_r, ymin = coeff - ci95, ymax = coeff + ci95),
                   lwd = 1, size=5) +
    geom_point(fill = "white", size = 3) +
    xlab("Outcome Variables") + 
    ylab(ylab) + 
    geom_text(aes(x=var_r, y=coeff, ymax = coeff + ci95,label=paste0(pval)),
              hjust=0.5, vjust=-0.9,size=4,color="black") +
    coord_flip() + 
    theme_bw() +
    scale_colour_manual(values = color) + 
    scale_shape_manual(values = shape) +
    theme(legend.position="none") +
    theme(panel.grid.minor.x= element_line(colour=gray(0.9)),
          panel.grid.major.x= element_line(colour=gray(0.9))) +
    theme(panel.grid.major.y = element_blank()) +
    theme(axis.ticks.y = element_blank()) +
    theme(axis.text.y = element_text(face="bold")) +
    theme(plot.title=element_text(face="bold")) +
    theme(strip.text.x = element_text(face = "bold"))
  print(gg)
  ggsave(filename = paste0(images_dir, file_name, ".pdf"), width = 7, height = 3.5)
}

# Make new graphs
# Attack vs. Control
# Figure 1: Attack vs. Control: Differences in Outcomes
main_plots(comparison = "Attack vs. Control", 
           ylab = "Estimated Treatment Effects (Attack - Control); N = 705", 
           color = "red", shape = 21, file_name = "main_outcomes_escalation_attack_control")
# Collision vs. Non-Collision 
# Figure 3: Collision vs. Non-Collision: Differences in Outcomes. 
main_plots(comparison = "Collision vs. Non-Collision", 
           ylab = "Estimated Treatment Effects (Collision - Non-Collision); N = 731", 
           color = "darkgreen", shape = 21, file_name = "main_outcomes_escalation_collision_non_collision")
# Accident vs. Control
# Figure 4: Accident vs. Control: Differences in Outcomes
main_plots(comparison = "Accident vs. Control", 
           ylab = "Estimated Treatment Effects (Accident - Control); N = 678", 
           color = "blue", shape = 21, file_name = "main_outcomes_escalation_accident_control")

#' placebo tests
#' Figure 2: Placebo Outcomes.
make_plots(res = res, group = "placebo", ncol_num = 1, 
           comp_type = c("Attack vs. Control", "Collision vs. Non-Collision"), 
           color_val = c("darkgreen", "red"), shape_val = rep(21, 2)) # placebo 
ggsave(filename = paste0(images_dir, "placebo_coef_plot.pdf"), width = 8, height = 3.5)

#' reputation stakes: not in the paper or appendix
make_plots(res = res, group = "reputation", color_val = c("blue", "darkgreen", "red"), ncol_num = 1, shape_val = rep(21, 3)) # reputation outcomes
ggsave(filename = paste0(images_dir, "reputation_outcome_coef_plot.pdf"), width = 8, height = 4)
#' reputation stakes combined: not in the paper or appendix
make_plots(res = res, group = "reputation_combo", color_val = c("blue", "darkgreen", "red"), ncol_num = 1, shape_val = rep(21, 3)) # reputation outcomes
ggsave(filename = paste0(images_dir, "reputation_outcome_coef_plot_combined.pdf"), width = 8, height = 3)



#' Additional analysis

# Randomization inference p-values
N <- nrow(CP)

# Make the assignments for comparing against Control
CP$Z <- NA
CP$Z[CP$Incident.f == "Control"] <- 1
CP$Z[CP$Incident.f == "Attack"] <- 2
CP$Z[CP$Incident.f == "Accident"] <- 3
CP$Z[CP$Incident.f == "Collision"] <- 4
CP$Z[CP$Incident.f == "Non-Collision"] <- 5
CP$Z <- factor(paste0("T", CP$Z), levels = paste0("T", 1:5))
# Declare the design
declaration <- declare_ra(N = N, num_arms = 5, simple = TRUE, conditions = paste0("T", 1:5))
dat <- data.frame(CP[,c("Z", unique(a_t$formula1[a_t$group == "importance considerations"]))])

# Functions for differences-in-means
mean_attack_control <- function(data) {
  with(data, mean(Y[Z == "T2"]) - mean(Y[Z == "T1"]))
}
mean_col_non <- function(data) {
  with(data, mean(Y[Z == "T4"]) - mean(Y[Z == "T5"]))
}
mean_accident_control <- function(data) {
  with(data, mean(Y[Z == "T3"]) - mean(Y[Z == "T1"]))
}
mean_attack_accident <- function(data) {
  with(data, mean(Y[Z == "T2"]) - mean(Y[Z == "T3"]))
}
mean_col_accident <- function(data) {
  with(data, mean(Y[Z == "T4"]) - mean(Y[Z == "T3"]))
}

# Functions for difference-in-variance
var_attack_control <- function(data) {
  with(data, var(Y[Z == "T2"]) - var(Y[Z == "T1"]))
}
var_col_non <- function(data) {
  with(data, var(Y[Z == "T4"]) - var(Y[Z == "T5"]))
}
var_accident_control <- function(data) {
  with(data, var(Y[Z == "T3"]) - var(Y[Z == "T1"]))
}
var_attack_accident <- function(data) {
  with(data, var(Y[Z == "T2"]) - var(Y[Z == "T3"]))
}
var_col_accident <- function(data) {
  with(data, var(Y[Z == "T4"]) - var(Y[Z == "T3"]))
}

ri_func <- function(outcome, test_function, nsims = 10000, 
                    outcome_name, comparison, test_type, ptype = "two-tailed") {
  dat$Y <- dat[,outcome]
  dat$Y[is.na(dat$Y)] <- mean(dat$Y, na.rm = TRUE)
  ri_out <- conduct_ri(
    test_function = test_function,
    declaration = declaration,
    sharp_hypothesis = 0, p = ptype,
    data = dat, sims = nsims, progress_bar = TRUE
  )
  data.frame(outcome_name, comparison, test_type, summary(ri_out))
}

# Get the variable labels for the importance considerations
a_t_mec <- a_t[a_t$group == "importance considerations",]

#The randomization inference take a very long time to run
# General function to run the ri
ri_summary <- function(outcome, test_function, test_type, comparison) {
  ri_func(outcome = outcome, test_function = test_function,
          outcome_name = outcome,
          comparison = comparison, test_type = test_type)
}
# Attack vs. Control
attack_control_mean <- lapply(unique(a_t_mec$formula1)[1:5], ri_summary,
                              test_function = mean_attack_control, test_type = "Mean",
                              comparison = "Attack vs. Control")
attack_control_var <- lapply(unique(a_t_mec$formula1)[1:5], ri_summary,
                             test_function = var_attack_control, test_type = "Variance",
                             comparison = "Attack vs. Control")

# Combine the results
ri_attack_control <- rbind(do.call(rbind, attack_control_mean),
                           do.call(rbind, attack_control_var))

# Collision vs. Non-Collision
col_non_mean <- lapply(unique(a_t_mec$formula1)[1:5], ri_summary,
                       test_function = mean_col_non, test_type = "Mean",
                       comparison = "Collision vs. Non-Collision")
col_non_var <- lapply(unique(a_t_mec$formula1)[1:5], ri_summary,
                      test_function = var_col_non, test_type = "Variance",
                      comparison = "Collision vs. Non-Collision")
# Combine the results
ri_col_non <- rbind(do.call(rbind, col_non_mean),
                    do.call(rbind, col_non_var))

# Accident vs. Control
accident_control_mean <- lapply(unique(a_t_mec$formula1)[1:5], ri_summary,
                                test_function = mean_accident_control, test_type = "Mean",
                                comparison = "Accident vs. Control")
accident_control_var <- lapply(unique(a_t_mec$formula1)[1:5], ri_summary,
                               test_function = var_accident_control, test_type = "Variance",
                               comparison = "Accident vs. Control")
# Combine the results
ri_accident_control <- rbind(do.call(rbind, accident_control_mean),
                             do.call(rbind, accident_control_var))

# Honor questions
# Attack vs. Accident
# fix the missing data problem
dat$ImpHonor[dat$ImpHonor == -99 | is.na(dat$ImpHonor)] <-
  mean(dat$ImpHonor[dat$ImpHonor != -99 & !is.na(dat$ImpHonor)])
# run the ri function
ri_honor <- rbind(ri_summary(outcome = "ImpHonor", test_function = mean_attack_accident,
                             test_type = "Mean", comparison = "Attack vs. Accident"),
                  ri_summary(outcome = "ImpHonor", test_function = mean_col_accident,
                             test_type = "Mean", comparison = "Collision vs. Accident"),
                  ri_summary(outcome = "ImpHonor", test_function = var_attack_accident,
                             test_type = "Variance", comparison = "Attack vs. Accident"),
                  ri_summary(outcome = "ImpHonor", test_function = var_col_accident,
                             test_type = "Variance", comparison = "Collision vs. Accident"))

# Merge with the exisiting results
ri_res <- rbind(ri_attack_control, ri_col_non, ri_accident_control, ri_honor)
save(ri_res, file = paste0(images_dir, "ri_results.RData"))
var_labels <- unique(a_t_mec[,c("formula1", "var")])
names(var_labels)[1] <- "outcome_name"
ri_res <- merge(x = ri_res, y = var_labels, all.x = TRUE)
ri_res$var <- factor(ri_res$var, levels = var_labels$var)
ri_res$test_type <- ifelse(ri_res$test_type == "Mean", "Differences-in-Means",
                           "Differences-in-Variances")
ri_res$comparison <- factor(ri_res$comparison, levels = rev(unique(ri_res$comparison)))

# Make the graph
ggplot(ri_res, aes(x = comparison, y = two_tailed_p_value, shape = test_type,
                   color = comparison)) +
  geom_hline(yintercept = 0.05, alpha = 0.4, linetype = 2) +
  geom_hline(yintercept = 0.01, alpha = 0.4, linetype = 3) +
  geom_point() + coord_flip() + facet_wrap(~var, ncol = 1, scales = "free_y") + theme_bw() +
  ylab("Two-tailed p-values") + xlab("Comparisons") +
  scale_shape_manual(name = "Test statistics", values = c(2, 3)) +
  scale_color_manual(values = rev(color_val), guide = FALSE) +
  theme(legend.position="bottom") +
  theme(axis.ticks.y = element_blank()) +
  theme(axis.text.y = element_text(face="bold")) +
  theme(plot.title=element_text(face="bold")) +
  theme(strip.text.x = element_text(face = "bold"))
ggsave(filename = paste0(images_dir, "ruling_out.pdf"), width = 8, height = 6)

# Importance considerations
ri_res_m <- ri_res[ri_res$test_type == "Differences-in-Means",
                   c("var", "comparison", "estimate", "two_tailed_p_value")]
ri_res_v <- ri_res[ri_res$test_type == "Differences-in-Variances",
                   c("var", "comparison", "estimate", "two_tailed_p_value")]
# Merge in the p-values from randomization inference
names(ri_res_m) <- c("var", "comp", "ri_est_m", "ri_p_m")
names(ri_res_v) <- c("var", "comp", "ri_est_v", "ri_p_v")
res_impc <- merge(x = res[res$group == "importance considerations",],
                  y = ri_res_m, all.x = TRUE)
res_impc <- merge(x = res_impc,
                  y = ri_res_v, all.x = TRUE)
res_impc$pval_ols <- res_impc$pval
res_impc$pval <- ifelse(res_impc$ri_p_m < 0.001, paste0("p < 0.001"),
                        paste0("p = ", sprintf("%.3f", round(res_impc$ri_p_m, 3))))
res_impc$m_pval <- ifelse(res_impc$ri_p_m < 0.001, paste0("< 0.001"),
                          sprintf("%.3f", round(res_impc$ri_p_m, 3)))
res_impc$v_pval <- ifelse(res_impc$ri_p_v < 0.001, paste0("< 0.001"),
                          sprintf("%.3f", round(res_impc$ri_p_v, 3)))
res_impc$comp <- factor(res_impc$comp, levels = rev(comp_levels))
res_impc$var <- factor(res_impc$var, levels = c("Consideration: Avoid Risk of Conflict",
                                                "Consideration: Maintain US Reputation",
                                                "Consideration: Punishing China",
                                                "Consideration: Preserve US Access to E. China Sea",
                                                "Consideration: Resist China's Threat to US",
                                                "Consideration: Honor US Pilot"))
# make the graph with ri p-values
make_plots(res = res_impc, 
           group = "importance considerations", ncol_num = 1, shape_val = rep(21, 5))
ggsave(filename = paste0(images_dir, "importance_considerations.pdf"), width = 8, height = 10)

# Clean up before output to LaTeX
res_impc$c_var <- gsub(pattern = "Consideration: ", replacement = "", res_impc$var)

# Output to LaTeX
# Difference in Means
#' Table 7: Respondents' Considerationos When Deciding to Maintain Claims (Difference in Means)
res_impc <- res_impc[order(res_impc$var),]
stargazer(res_impc[,c("c_var", "comp", "ri_est_m", "m_pval")],
          summary = FALSE, digits = 3, rownames = FALSE)
# Difference in Variances
#' Table 8: Respondents' Considerationos When Deciding to Maintain Claims (Difference in Variances)
stargazer(res_impc[,c("c_var", "comp", "ri_est_v", "v_pval")],
          summary = FALSE, digits = 3, rownames = FALSE)


#' All three reputation stakes measures
# Randomization inference
dat <- data.frame(CP[,c("Z", unique(a_t$formula1[a_t$group %in% 
                                                   c("reputation", "reputation_combo")]))])
a_t_rep <- a_t[a_t$group %in% c("reputation", "reputation_combo"),]

# Attack vs. Control
attack_control_mean <- lapply(unique(a_t_rep$formula1)[1:3], ri_summary, 
                              test_function = mean_attack_control, test_type = "Mean", 
                              comparison = "Attack vs. Control")
attack_control_var <- lapply(unique(a_t_rep$formula1)[1:3], ri_summary, 
                             test_function = var_attack_control, test_type = "Variance",
                             comparison = "Attack vs. Control")


# Combine the results
ri_attack_control <- rbind(do.call(rbind, attack_control_mean), 
                           do.call(rbind, attack_control_var))

# Collision vs. Non-Collision
col_non_mean <- lapply(unique(a_t_rep$formula1)[1:3], ri_summary, 
                       test_function = mean_col_non, test_type = "Mean",
                       comparison = "Collision vs. Non-Collision")
col_non_var <- lapply(unique(a_t_rep$formula1)[1:3], ri_summary, 
                      test_function = var_col_non, test_type = "Variance",
                      comparison = "Collision vs. Non-Collision")
# Combine the results
ri_col_non <- rbind(do.call(rbind, col_non_mean), 
                    do.call(rbind, col_non_var))

# Accident vs. Control
accident_control_mean <- lapply(unique(a_t_rep$formula1)[1:3], ri_summary, 
                                test_function = mean_accident_control, test_type = "Mean",
                                comparison = "Accident vs. Control")
accident_control_var <- lapply(unique(a_t_rep$formula1)[1:3], ri_summary, 
                               test_function = var_accident_control, test_type = "Variance",
                               comparison = "Accident vs. Control")
# Combine the results
ri_accident_control <- rbind(do.call(rbind, accident_control_mean), 
                             do.call(rbind, accident_control_var))

rep_ri <- rbind(ri_attack_control, ri_col_non, ri_accident_control)
save(rep_ri, file = paste0(images_dir, "ri_results_rep.RData"))

var_labels <- unique(a_t_rep[,c("formula1", "var")])
names(var_labels)[1] <- "outcome_name"
rep_ri <- merge(x = rep_ri, y = var_labels, all.x = TRUE)
rep_ri$var <- factor(rep_ri$var, levels = var_labels$var)
rep_ri$test_type <- ifelse(rep_ri$test_type == "Mean", "Differences-in-Means",
                           "Differences-in-Variances")
rep_ri$comparison <- factor(rep_ri$comparison, levels = rev(unique(rep_ri$comparison)))

rep_ri_m <- rep_ri[rep_ri$test_type == "Differences-in-Means", 
                   c("var", "comparison", "estimate", "two_tailed_p_value")]
rep_ri_v <- rep_ri[rep_ri$test_type == "Differences-in-Variances",
                   c("var", "comparison", "estimate", "two_tailed_p_value")]

# merge in the p-values from randomization inference
names(rep_ri_m) <- c("var", "comp", "ri_est_m", "ri_p_m")
names(rep_ri_v) <- c("var", "comp", "ri_est_v", "ri_p_v")

res_rep <- merge(x = res[res$group %in% c("reputation", "reputation_combo"),], 
                 y = rep_ri_m, all.x = TRUE)
res_rep <- merge(x = res_rep, 
                 y = rep_ri_v, all.x = TRUE)
# Clean up the results data
res_rep$pval_ols <- res_rep$pval
res_rep$pval <- ifelse(res_rep$ri_p_m < 0.001, paste0("p < 0.001"),
                       paste0("p = ", sprintf("%.3f", round(res_rep$ri_p_m, 3))))
res_rep$m_pval <- ifelse(res_rep$ri_p_m < 0.001, paste0("< 0.001"),
                         sprintf("%.3f", round(res_rep$ri_p_m, 3)))
res_rep$v_pval <- ifelse(res_rep$ri_p_v < 0.001, paste0("< 0.001"),
                         sprintf("%.3f", round(res_rep$ri_p_v, 3)))
res_rep$comp <- factor(res_rep$comp, levels = rev(comp_levels))
# clean up the facets
res_rep$var <- factor(res_rep$var, levels = c("US Reputation at Stake",
                                              "US Reputation at Stake (Combined Measure)",
                                              "Those Who Indicated Reputation Is at Stake: Magnitude of Reputation Stakes"))
#' Figure 6: Reputation at Stake
make_plots(res = res_rep, group = c("reputation", "reputation_combo"), color_val = c("blue", "darkgreen", "red"), ncol_num = 1, rep(21, 3)) # reputation outcomes
ggsave(filename = paste0(images_dir, "reputation_outcome_coef_plot_all_3.pdf"), width = 8, height = 5.5)

# Output to LaTeX
# Difference in Means
res_rep <- res_rep[order(res_rep$var),]
#' Table 5: Reputation at Stake
stargazer(res_rep[,c("var", "comp", "ri_est_m", "m_pval")],
          summary = FALSE, digits = 3, rownames = FALSE)
#' Table 6: Reputation at Stake
stargazer(res_rep[,c("var", "comp", "ri_est_v", "v_pval")],
          summary = FALSE, digits = 3, rownames = FALSE)

#' China's behavior
#' Figure 9: Additional Outcomes
make_plots(res = res, group = c("cost", "behavior"), ncol_num = 1, color_val = c("blue", "darkgreen", "red"),
           shape_val = rep(21, 5))
ggsave(filename = paste0(images_dir, "implicit_mediation.pdf"), width = 8, height = 6)

#' Density plots of the 4 main outcome measures
main_vars <- unique(a_t$formula1[a_t$group == "main"])
den_dat <- rbind(data.frame(var = "Levels of Escalation", y = CP$LevelEsc,
                            comparison = CP$Incident.f,
                            AttackControl = CP$AttackControl, 
                            CollisionNonCollis = CP$CollisionNonCollis,
                            AccidentControl = CP$AccidentControl),
                 data.frame(var = "Economic Costs", y = CP$EcCosts,
                            comparison = CP$Incident.f,
                            AttackControl = CP$AttackControl, 
                            CollisionNonCollis = CP$CollisionNonCollis,
                            AccidentControl = CP$AccidentControl),
                 data.frame(var = "Risk of War", y = CP$RiskWar,
                            comparison = CP$Incident.f,
                            AttackControl = CP$AttackControl, 
                            CollisionNonCollis = CP$CollisionNonCollis,
                            AccidentControl = CP$AccidentControl),
                 data.frame(var = "Military Deaths", y = CP$MilDeaths,
                            comparison = CP$Incident.f,
                            AttackControl = CP$AttackControl, 
                            CollisionNonCollis = CP$CollisionNonCollis,
                            AccidentControl = CP$AccidentControl))
den_dat$AttackControl <- ifelse(den_dat$AttackControl == 0, "Control", 
                                "Attack")
den_dat$CollisionNonCollis <- 
  ifelse(den_dat$CollisionNonCollis == 0, "Non-Collision", "Collision")
den_dat$AccidentControl <- 
  ifelse(den_dat$AccidentControl == 0, "Control", "Accident")
mean_group <- den_dat %>% group_by(comparison, var) %>% dplyr::summarise(
  mean_group = mean(y)
)
den_dat <- merge(x = den_dat, y = mean_group, all.x = TRUE)

#' Attack vs. Control
ggplot(data = den_dat[!is.na(den_dat$AttackControl),], 
       mapping = aes(x = y, fill = AttackControl)) + 
  geom_density(alpha = 0.3, adjust = 2.5) +
  geom_vline(mapping = aes(xintercept = mean_group, color = AttackControl),
             linetype = "longdash") + xlab("Response") + ylab("Density") +
  scale_color_manual(values = c("red", "blue"), name = "Comparison") +
  scale_fill_manual(values = c("red", "blue"), name = "Comparison") +
  theme_bw() + facet_wrap(~var, ncol = 2, scales = "free") +
  ggtitle("Distribution of Responses: Attack vs. Control") +
  theme(legend.position = "bottom")
# not included in the paper or appendix
ggsave(filename = paste0(images_dir, "density_attack_control.pdf"), width = 8, height = 5)
#' Collision vs. Non-Collision
ggplot(data = den_dat[!is.na(den_dat$CollisionNonCollis),], 
       mapping = aes(x = y, fill = CollisionNonCollis)) + 
  geom_density(alpha = 0.3, adjust = 2.5) +
  geom_vline(mapping = aes(xintercept = mean_group, color = CollisionNonCollis),
             linetype = "longdash") + xlab("Response") + ylab("Density") +
  scale_color_manual(values = c("darkgreen", "brown"), name = "Comparison") +
  scale_fill_manual(values = c("darkgreen", "brown"), name = "Comparison") +
  theme_bw() + facet_wrap(~var, ncol = 2, scales = "free") +
  ggtitle("Distribution of Responses: Collision vs. Non-Collision") +
  theme(legend.position = "bottom")
# not included in the paper or appendix
ggsave(filename = paste0(images_dir, "density_collision_non-collision.pdf"), width = 8, height = 5)
#' Accident vs. Control
ggplot(data = den_dat[!is.na(den_dat$AccidentControl),], 
       mapping = aes(x = y, fill = AccidentControl)) + 
  geom_density(alpha = 0.3, adjust = 2.5) +
  geom_vline(mapping = aes(xintercept = mean_group, color = AccidentControl),
             linetype = "longdash") + xlab("Response") + ylab("Density") +
  scale_color_manual(values = c("darkorange", "blue"), name = "Comparison") +
  scale_fill_manual(values = c("darkorange", "blue"), name = "Comparison") +
  theme_bw() + facet_wrap(~var, ncol = 2, scales = "free") +
  ggtitle("Distribution of Responses: Accident vs. Control") +
  theme(legend.position = "bottom")
# not included in the paper or appendix
ggsave(filename = paste0(images_dir, "density_accident_control.pdf"), width = 8, height = 5)

#' Reputation Types Bar Chart
CP$AttackControl_l <- ifelse(CP$AttackControl == 0, "Control", 
                             "Attack")
CP$CollisionNonCollis_l <- 
  ifelse(CP$CollisionNonCollis == 0, "Non-Collision", "Collision")
CP$AccidentControl_l <- 
  ifelse(CP$AccidentControl == 0, "Control", "Accident")
imp <- CP[CP$RepStake.q != "",] %>% group_by(RepStake.q, Incident.f) %>% 
  dplyr::summarise(rep_stake_yes = mean(RepStake),
                   rep_stake_se = sd(RepStake)/sqrt((length(RepStake)-1)))
imp_n <- CP[CP$RepStake.q != "",] %>% group_by(RepStake.q) %>% 
  dplyr::summarise(rep_stake_n = length(RepStake))
rep_stake_lab <- paste0(c("Credibility", "Honor", "Image Abroad",
                          "International Status", "National Honor",
                          "Prestige", "International Reputation"),
                        " (N=", imp_n$rep_stake_n, ")")
for (i in 1:length(unique(imp$RepStake.q))) {
  imp$RepStake.q[imp$RepStake.q == unique(imp$RepStake.q)[i]] <- 
    rep_stake_lab[i]
}
imp$RepStake.q <- factor(imp$RepStake.q, 
                         levels = unique(imp$RepStake.q)[c(6,7,3,1,2,5,4)])
imp$Incident.f <- factor(imp$Incident.f,
                         levels = c("Attack", "Control", "Collision",
                                    "Non-Collision", "Accident"))

#' Figure 7: Types of Reputation at Stake.
ggplot(data = imp) +
  geom_bar(aes(x = Incident.f, y = rep_stake_yes,
               fill = Incident.f),
           stat = "identity", position = "dodge", color = NA) + 
  geom_errorbar(aes(x = Incident.f, ymin = qnorm(0.025)*rep_stake_se+rep_stake_yes, 
                    ymax = qnorm(0.975)*rep_stake_se+rep_stake_yes), width=.2) +
  theme_bw() + ylim(0, 1) +
  scale_fill_manual(values = c("red","brown4","darkgreen", "purple", "blue"),
                    name = "Treatment Assignment") +
  facet_wrap(~RepStake.q) + xlab("") +
  theme(legend.position = c(0.6, 0.2), axis.ticks = element_blank(), 
        axis.text.x = element_blank()) + 
  guides(fill = guide_legend(nrow=2,byrow=TRUE)) +
  ylab("Proportion Who Indicated Reputation Is At Stake") 
ggsave(filename = paste0(images_dir, "barchart_reputation_at_stake.pdf"), width = 8, height = 6)

#' Importance of considerations
#' mean impute 
CP$ImpCosts[is.na(CP$ImpCosts)] <- round(mean(CP$ImpCosts, na.rm = TRUE))
CP$ImpStakes[is.na(CP$ImpStakes)] <- round(mean(CP$ImpStakes, na.rm = TRUE))
CP$ImpThreat[is.na(CP$ImpThreat)] <- round(mean(CP$ImpThreat, na.rm = TRUE))
CP$ImpReputation[is.na(CP$ImpReputation)] <- round(mean(CP$ImpReputation, na.rm = TRUE))
CP$ImpHonor[is.na(CP$ImpHonor)] <- round(mean(CP$ImpHonor[CP$ImpHonor != -99], na.rm = TRUE))
CP$ImpHonor[CP$ImpHonor == -99] <- NA
CP$ImpRevenge[is.na(CP$ImpRevenge)] <- round(mean(CP$ImpRevenge, na.rm = TRUE))
CP$Coercion[is.na(CP$Coercion)] <- round(mean(CP$Coercion, na.rm = TRUE))
imp_dat_raw <- CP %>% group_by(Incident.f) %>% dplyr::summarise(
  m_Coercion_coef = mean(Coercion), 
  m_Coercion_se = sd(Coercion)/sqrt((length(Coercion)-1)),
  m_ImpCosts_coef = mean(ImpCosts),
  m_ImpCosts_se = sd(ImpCosts)/sqrt((length(ImpCosts)-1)),
  m_ImpStakes_coef = mean(ImpStakes),
  m_ImpStakes_se = sd(ImpStakes)/sqrt((length(ImpStakes)-1)),
  m_ImpThreat_coef = mean(ImpThreat),
  m_ImpThreat_se = sd(ImpThreat)/sqrt((length(ImpThreat)-1)),
  m_ImpReputation_coef = mean(ImpReputation),
  m_ImpReputation_se = sd(ImpReputation)/sqrt((length(ImpReputation)-1)),
  m_ImpHonor_coef = mean(ImpHonor),
  m_ImpHonor_se = sd(ImpHonor)/sqrt((length(ImpHonor)-1)),
  m_ImpRevenge_coef = mean(ImpRevenge),
  m_ImpRevenge_se = sd(ImpRevenge)/sqrt((length(ImpRevenge)-1))
) 
imp_dat <- imp_dat_raw[,c(1, grep("_coef", names(imp_dat_raw)))] %>% 
  reshape2::melt(value.name = "coef") %>% as.data.frame()
imp_dat$variable <- as.character(imp_dat$variable)
imp_labels <- c("China's Level of Coercion", "Avoid Risk of Conflict", 
                "Preserve US Access to E. China Sea",
                "Resist China's Threat to US", "Maintain US Reputation",
                "Honor US Pilot", "Punishing China")
for (i in 1:length(imp_labels)) {
  imp_dat$variable[imp_dat$variable == unique(imp_dat$variable)[i]] <- 
    imp_labels[i]
}
# add in the standard errors
imp_se <- imp_dat_raw[,c(1, grep("_se", names(imp_dat_raw)))] %>% 
  reshape2::melt(value.name = "se") %>% as.data.frame()
imp_dat$se <- imp_se$se
imp_dat$variable <- factor(imp_dat$variable, levels = imp_labels)
imp_dat$Incident.f <- factor(imp_dat$Incident.f,
                             levels = c("Attack", "Control", "Collision",
                                        "Non-Collision", "Accident"))
#' Figure 8: Respondents’ Considerations When Deciding to Maintain Claims.
ggplot(data = imp_dat[imp_dat$variable != "China's Level of Coercion",]) +
  geom_bar(mapping = aes(x = Incident.f, y = coef,
                         fill = Incident.f),
           stat = "identity", position = "dodge", color = NA) + 
  theme_bw() + ylim(0, 3) + 
  geom_errorbar(mapping = aes(x = Incident.f, 
                              ymin = qnorm(0.025)*se + coef,
                              ymax = qnorm(0.975)*se + coef), width = 0.2) +
  scale_fill_manual(values = c("red", "brown4", "darkgreen", "purple", "blue"),
                    name = "Treatment Assignment") +
  facet_wrap(~variable) + xlab("") +
  theme(legend.position = "bottom", axis.ticks = element_blank(), 
        axis.text.x = element_blank()) + 
  ylab("Level of Importance")
ggsave(filename = paste0(images_dir, "barchart_importance_considerations.pdf"), width = 8, height = 6)

#' China's Level of Coercion
ggplot(data = imp_dat[imp_dat$variable == "China's Level of Coercion",], 
       mapping = aes(x = Incident.f, y = coef,
                     fill = Incident.f)) +
  geom_bar(stat = "identity", position = "dodge", color = NA) + 
  theme_bw() + ylim(0, 3) +
  scale_fill_manual(values = c("red", "darkgreen", "brown4", "purple", "blue"),
                    name = "Treatment Assignment") + xlab("") +
  theme(legend.position = "bottom", axis.ticks = element_blank(), 
        axis.text.x = element_blank()) + 
  guides(fill = guide_legend(nrow=2,byrow=TRUE)) +
  ylab("Level of Coercion") #+
ggtitle("Respondents' Perception of China's Level of Coercion")
# Not included in the paper or appendix
ggsave(filename = paste0(images_dir, "barchart_coercion.pdf"), width = 5.5, height = 4)


#' #### Balance Tests

# clean the data
CP$Gender[is.na(CP$Gender) | CP$Gender == -98] <- 
  round(mean(CP$Gender[CP$Gender > 0], na.rm = TRUE))
CP$Age[CP$Age == 1982] <- 2016-1982
CP$Age[CP$Age == 0.39] <- 39
CP$Age[is.na(CP$Age) | CP$Age == -98] <- 
  round(mean(CP$Age[CP$Age > 0], na.rm = TRUE))
CP$Education[is.na(CP$Education) | CP$Education == -98] <- 
  round(mean(CP$Education[CP$Education > 0], na.rm = TRUE))
CP$Income[is.na(CP$Income) | CP$Income == -98] <- 
  round(mean(CP$Income[CP$Income > 0], na.rm = TRUE))
CP$Pol7scale[is.na(CP$Pol7scale) | CP$Pol7scale == -98] <- 
  round(mean(CP$Pol7scale[CP$Pol7scale > 0], na.rm = TRUE))
CP$Pol7scale[CP$Pol7scale == 8] <- 4
CP$PolParty[is.na(CP$PolParty) | CP$PolParty == -98] <- 
  round(mean(CP$PolParty[CP$PolParty > 0], na.rm = TRUE))

# construct the variables used
CP$male <- ifelse(CP$Gender == 1, TRUE, FALSE)
CP$college <- ifelse(CP$Education >= 5, TRUE, FALSE)
CP$white <- ifelse(CP$Race == 1, TRUE, FALSE)
CP$white[is.na(CP$white)] <- FALSE
CP$income50k <- ifelse(CP$Income >= 6, TRUE, FALSE)
CP$democrat <- ifelse(CP$PolParty == 2, TRUE, FALSE)

#' Table 3: Summary Statistics of Respondents' Background Characteristics 
bal_table <- CP %>% group_by(Incident.f) %>% dplyr::summarise(
  age = round(mean(Age), 2),
  male = round(mean(male), 2),
  college = round(mean(college), 2),
  white = round(mean(white), 2),
  income50k = round(mean(income50k), 2),
  pid = round(mean(Pol7scale), 2),
  democrat = round(mean(democrat), 2)
) %>% as.data.frame() %>% t()
rownames(bal_table) <- c("Variables", "Mean age",
                         "Prop. male", "Prop. with college degree",
                         "Prop. white", "Prop. income", "Mean PID 7",
                         "Prop. Democrat")

stargazer(bal_table, summary = FALSE)
table(CP$Incident.f)

#' We test for balance on background variables across treatment groups in each comparison. 
balance.comp1 <- coef_r(lm(AccidentControl == "Accident" ~ scale(Age) + scale(male) + scale(college) + scale(white) + scale(income50k) + scale(Pol7scale) + scale(democrat), data=CP))
balance.comp2 <- coef_r(lm(CollisionNonCollis == "Collision" ~ scale(Age) + scale(male) + scale(college) + scale(white) + scale(income50k) + scale(Pol7scale) + scale(democrat), data=CP))
balance.comp3 <- coef_r(lm(AttackControl == "Attack" ~ scale(Age) + scale(male) + scale(college) + scale(white) + scale(income50k) + scale(Pol7scale) + scale(democrat), data=CP))

stargazer(balance.comp1,balance.comp2,balance.comp3,
          title="Balance on Background Characteristics",
          font.size="footnotesize", keep.stat=c("n", "F"),
          style = "qje", model.numbers = FALSE)

# get F statistics
balance.comp1 <- lm(AccidentControl == "Accident" ~ scale(Age) + scale(male) + scale(college) + scale(white) + scale(income50k) + scale(Pol7scale) + scale(democrat), data=CP)
balance.comp2 <- lm(CollisionNonCollis == "Collision" ~ scale(Age) + scale(male) + scale(college) + scale(white) + scale(income50k) + scale(Pol7scale) + scale(democrat), data=CP)
balance.comp3 <- lm(AttackControl == "Attack" ~ scale(Age) + scale(male) + scale(college) + scale(white) + scale(income50k) + scale(Pol7scale) + scale(democrat), data=CP)

#' Table 4: Balance on Background Characteristics
stargazer(balance.comp1,balance.comp2,balance.comp3,
          title="Balance on Background Characteristics",
          font.size="footnotesize", keep.stat=c("n", "F"),
          style = "qje", model.numbers = FALSE)

#####################################
# Additional analysis

#' Heterogenous treatment effect

# Function for extracting estimates of heterogenous treatment effects
a_main <- a_t[a_t$group == "main",]
subgroup_func <- function(var_name, outcome_num, subgroup) {
  temp <- coef_r(lm_object = 
                   lm(formula(paste0(a_main$formula1[outcome_num], "~", 
                                     a_main$formula2[outcome_num], "+", var_name, "+ ",
                                     a_main$formula2[outcome_num], ":", var_name)), data = CP))[4,]
  names(temp) <- c("coef", "se", "t", "p_value")
  N_temp <- CP[,a_main$formula2[outcome_num]]
  return(data.frame(subgroup = subgroup, var = a_main$var[outcome_num], 
                    comp = a_main$comp[outcome_num],
                    t(temp), N = length(N_temp[!is.na(N_temp)])))
}
#' Male vs. female
sg_gender <- do.call(rbind, lapply(1:nrow(a_main), subgroup_func, 
                                   var_name = "male", subgroup = "Male"))

#' Main Results for Southern, white versus not
table(CP$SouthernState, useNA = "always") # table respondents
CP$SW <- CP$SouthernState == 1 & CP$Race == 1 # Southern and White
# mean impute
CP$SW[is.na(CP$SW)] <- round(mean(CP$SW, na.rm = TRUE))
table(CP$SW, useNA = "always")
# store results
sg_southern <- do.call(rbind, lapply(1:nrow(a_main), subgroup_func, 
                                     var_name = "SW", subgroup = "Southern White"))

#' Democrat
CP$republican <- ifelse(CP$PolParty == 4, TRUE, FALSE)
sg_republican <- do.call(rbind, lapply(1:nrow(a_main), subgroup_func, 
                                       var_name = "republican", subgroup = "Republican"))

#' Political ideology
CP$conservative <- ifelse(CP$Pol7scale >= 5, TRUE, FALSE)
sg_conservative <- do.call(rbind, lapply(1:nrow(a_main), subgroup_func, 
                                         var_name = "conservative", subgroup = "Conservative"))

#' Military assertativeness
# mean impute
CP$MilAssert[is.na(CP$MilAssert)] <- mean(CP$MilAssert, na.rm = TRUE)
sg_milassert <- do.call(rbind, lapply(1:nrow(a_main), subgroup_func, 
                                      var_name = "MilAssert", subgroup = "Military Assertiveness"))
#' Make a coefficient plot
sg <- rbind(sg_gender, sg_southern, sg_republican, sg_conservative, sg_milassert)
sg$pval <- paste0("p = ", sprintf("%.3f", round(sg$p_value, 3)))
sg$var <- factor(sg$var, levels = c("Levels of Escalation", "Economic Costs",
                                    "Military Deaths", "Risk of War"))

#' Figure 23: Subgroup Analysis of Main Outcome Effects.
ggplot(data = sg, 
       aes(x=comp, y=coef,color=comp,shape=comp)) +    
  geom_hline(yintercept = 0, colour = gray(0.5), lty = 2) +
  geom_linerange(aes(ymin = qnorm(0.05)*se+coef, 
                     ymax = qnorm(0.95)*se+coef),
                 lwd = 1.5, size=5, position = position_dodge(0.5)) +
  geom_linerange(aes( ymin = qnorm(0.025)*se+coef, 
                      ymax = qnorm(0.975)*se+coef),
                 lwd = 1, size=5, position = position_dodge(0.5)) +
  geom_text(aes(x=comp, y=coef, 
                label=paste0(pval, "; N = ", N)),
            hjust=0.5, vjust=-0.9,size=4,color="black") +
  geom_point(fill = "white", size = 3) + 
  scale_colour_manual(values = c("red", "darkgreen", "blue")) + 
  scale_shape_manual(values = 21:23) +
  xlab("Comparisons") + ylab("Estimates of Interaction Effects") + 
  coord_flip() + theme_bw() +
  theme(legend.position="none") +
  theme(panel.grid.minor.x= element_line(colour=gray(0.9)),
        panel.grid.major.x= element_line(colour=gray(0.9))) + 
  theme(panel.grid.major.y = element_blank()) + 
  theme(axis.ticks.y = element_blank()) +
  theme(axis.text.y = element_text(face="bold")) +
  theme(plot.title=element_text(face="bold")) +
  theme(strip.text.x = element_text(face = "bold")) +
  facet_grid(subgroup ~ var)
ggsave(filename = paste0(images_dir, "interaction_effects_coef_plot.pdf"), width = 13, height = 8.5)

#' Military capability treatment 

# make density plots
CP$MilCapT_l <- ifelse(CP$MilCapT == 1, "US Inferior", "US Superior")
# Levels of Escalation
d_LevelEsc <- ggplot(data = CP, aes(x = LevelEsc, fill = MilCapT_l)) +
  geom_density(alpha = 0.3, adjust = 2.5) +
  geom_vline(xintercept = mean(CP$LevelEsc[CP$MilCapT_l == "US Inferior"]),
             linetype = "longdash", color = "violetred") +
  geom_vline(xintercept = mean(CP$LevelEsc[CP$MilCapT_l == "US Superior"]),
             linetype = "longdash", color = "deepskyblue4") +
  xlab("Response") + ylab("Density") +
  scale_color_manual(values = c("violetred", 
                                "deepskyblue"), name = "Comparison") +
  scale_fill_manual(values = c("violetred", 
                               "deepskyblue"), name = "Comparison") +
  theme_bw() + ggtitle("Levels of Escalation") + theme(legend.position = "bottom")
# Economic Costs
d_EcCosts <- ggplot(data = CP, aes(x = EcCosts, fill = MilCapT_l)) +
  geom_density(alpha = 0.3, adjust = 1.75) +
  geom_vline(xintercept = mean(CP$EcCosts[CP$MilCapT_l == "US Inferior"]),
             linetype = "longdash", color = "violetred") +
  geom_vline(xintercept = mean(CP$EcCosts[CP$MilCapT_l == "US Superior"]),
             linetype = "longdash", color = "deepskyblue4") +
  xlab("Response") + ylab("Density") +
  scale_color_manual(values = c("violetred", 
                                "deepskyblue"), name = "Comparison") +
  scale_fill_manual(values = c("violetred", 
                               "deepskyblue"), name = "Comparison") +
  theme_bw() + ggtitle("Economic Costs") + theme(legend.position = "bottom")
# Military Deaths
d_MilDeaths <- ggplot(data = CP, aes(x = MilDeaths, fill = MilCapT_l)) +
  geom_density(alpha = 0.3, adjust = 2.5) +
  geom_vline(xintercept = mean(CP$MilDeaths[CP$MilCapT_l == "US Inferior"]),
             linetype = "longdash", color = "violetred") +
  geom_vline(xintercept = mean(CP$MilDeaths[CP$MilCapT_l == "US Superior"]),
             linetype = "longdash", color = "deepskyblue4") +
  xlab("Response") + ylab("Density") +
  scale_color_manual(values = c("violetred", 
                                "deepskyblue"), name = "Comparison") +
  scale_fill_manual(values = c("violetred", 
                               "deepskyblue"), name = "Comparison") +
  theme_bw() + ggtitle("Military Deaths") + theme(legend.position = "bottom")
# Risk of War
d_RiskWar <- ggplot(data = CP, aes(x = RiskWar, fill = MilCapT_l)) +
  geom_density(alpha = 0.3, adjust = 2.5) +
  geom_vline(xintercept = mean(CP$RiskWar[CP$MilCapT_l == "US Inferior"]),
             linetype = "longdash", color = "violetred") +
  geom_vline(xintercept = mean(CP$RiskWar[CP$MilCapT_l == "US Superior"]),
             linetype = "longdash", color = "deepskyblue4") +
  xlab("Response") + ylab("Density") +
  scale_color_manual(values = c("violetred", 
                                "deepskyblue"), name = "Comparison") +
  scale_fill_manual(values = c("violetred", 
                               "deepskyblue"), name = "Comparison") +
  theme_bw() + ggtitle("Risk of War") + theme(legend.position = "bottom")
plot.new()
pdf(file = paste0(images_dir, "density_military_capability.pdf"), 
    width = 8, height = 6)
multiplot(d_LevelEsc, d_MilDeaths, d_EcCosts, d_RiskWar, cols = 2)
dev.off()

# make coefficient plot
res_mil <- res[res$group == "military",]
res_mil$var <- factor(as.character(res_mil$var), 
                      levels = unique(res_mil$var)[c(2,1,3,4)])
make_plots(res = res, group = "military", ncol_num = 1, color_val = c("black"))
ggsave(paste0(images_dir, "military_capability_coef.pdf"), width = 7, height = 4)



#' #### Data quality tests
#' **Check survey times:** indicate proportion of responses completed with a reasonable time (3 to 30 minutes). Note: survey time is measured in seconds.
#' 
# simulate and plot a survey time variable
#pdf(file="Pre-Analysis/SurveyTime.pdf",width = 12, height = 3.4)
CP$SurveyTimeSim <- rlnorm(nrow(CP), 10*.60, 1)
plot(density(CP$SurveyTimeSim), main="Time taken to complete survey (seconds)")
abline(v=180, col="red")
abline(v=1800, col="red")
#dev.off()

#' Assess whether **measures of data quality** interact with treatment and affect survey responses. Data quality measures are centered in order to appear on the same scale. 

data.quality.LevelEsc <- lm(LevelEsc~ scale(SurveyTimeSim)*scale(AttackControl) + scale(FailedCatchTri)*scale(AttackControl) + scale(OnlineDisc)*scale(AttackControl) + scale(SimSurvey)*scale(AttackControl),data=CP)
coefplot(data.quality.LevelEsc)

#' Test for **ordering effects**: assess whether the ordering of answer-options affects the answer to the question. 
#' 
#' For ordered variables, we test whether a descending answer-option order increases the level of the variable. Variables with the suffix ".q" are order indicator variables. These indicators take the value "$[$variable name$]\_$d" for descending order or "$[$variable name$]\_$a" for an ascending order. 
#' 
#' H: a descending order (as opposed to ascending) increases the level of the variable.
OT1 <- coef_r(lm(LevelEsc~LevelEsc.q, data=CP))
OT2 <- coef_r(lm(EcCosts~EcCosts.q, data=CP))
OT3 <- coef_r(lm(MilDeaths~MilDeaths.q, data=CP))
OT4 <- coef_r(lm(RiskWar~RiskWar.q, data=CP))
OT5 <- coef_r(lm(MagStake~MagStakes.q, data=CP))
OT6 <- coef_r(lm(MatCosts~MatCost.q, data=CP))
OT7 <- coef_r(lm(MilCap~MilCap.q, data=CP))
OT8 <- coef_r(lm(MilInt~MilInt.q,data=CP))
OT9 <- coef_r(lm(MagRepCosts~RepCost.q,data=CP))

#' For the coercive dummy variable, we test whether the option "avoid force" being presented first (coercive.q=1) decreases the likelihood the incident is perceived as coercive (Coercive=1)
OT10 <- coef_r(glm(Coercive~Coercive.q,data=CP,family="binomial"))

#' We also test whether a resolve question being presented first increases the level of that resolve variable.
OT11 <- coef_r(lm(EcCosts~EcCosts.q1, data=CP))
OT12 <- coef_r(lm(MilDeaths~MilDeaths.q1, data=CP))
OT13 <- coef_r(lm(RiskWar~RiskWar.q1, data=CP))

#' For unordered variables, we test whether an answer-option being presented first in the list makes that answer more likely to be chosen. Variables with the suffix .q are order indicator variables, where the value of the variable is the option presented first in the list. To test for ordering effects here, we use logistic regression with dummy indicator and dependent variables.
#' 
#' H: an answer-option being presented first increases the probability that that answer is chosen.
#' 
# simulate Gender dummy variables that are dependent on order dummy variables
e <- rnorm(n = nrow(CP), mean = 0, sd = 1)
Gender.q.f <-sample(c(0,1), size = nrow(CP), replace=T) # indicator variable is 1 if "female" answer-option presented first, 0 OW
Female <- 1 + 3*Gender.q.f + e
pr <- 1/(1+exp(-Female))
Female <- rbinom(nrow(CP), 1, pr) # dv is 1 if response is "female", 0 OW

# check that logit regression picks up the ordering effect in the simulated variable...
summary(glm(Female~Gender.q.f,family="binomial"))

# ...and that the procedure does not pick up an ordering effect in the test data (which is randomly generated).
n <- nrow(CP)
CP$Female <-rep(0,n)
CP$Female[CP$Gender==1] <- 1
table(CP$Female)
CP$Gender.q.f <-rep(0,n)
CP$Gender.q.f[CP$Gender.q==1] <- 1
table(CP$Gender.q.f)
summary(glm(Female~Gender.q.f,data=CP,family="binomial"))

CP$Female <-rep(0,n)
CP$Female[CP$Gender==1] <- 1
table(CP$Female)
CP$Gender.q.f <-rep(0,n)
CP$Gender.q.f[CP$Gender.q==1] <- 1
table(CP$Gender.q.f)
coef_r(glm(Female~Gender.q.f,data=CP,family="binomial"))

# The same procedure will be used for the categorical background variable Race.

#' Assess whether the randomly-assigned reputation term affects the level of reputation stakes.
#' 
#' H: there is a relationship between the reputation term presented and the magnitude of stakes.
OT14 <- coef_r(lm(MagStake~RepStake.q, data=CP))

#' Assess whether the order in which the 6 considerations are presented affects their level of importance. The order indicator variables here are dummies indicating whether the variable was presented first in the list.
#' 
#' H: a consideration being presented first increases its importance.
OT15 <- coef_r(lm(ImpCosts~ImpCost.q,data=CP))
OT16 <- coef_r(lm(ImpStakes~ImpStakes.q,data=CP))
OT17 <- coef_r(lm(ImpThreat~ImpThreat.q,data=CP))
OT18 <- coef_r(lm(ImpReputation~ImpReputation.q,data=CP))
OT19 <- coef_r(lm(ImpHonor~ImpHonor.q,data=CP))
OT20 <- coef_r(lm(ImpRevenge~ImpRevenge.q,data=CP))


# Subset the main variables. 

mainvars<- c("LevelEsc","EcCosts","RiskWar","MilDeaths","RepStake","MagStake","MatCosts","Coercion","MilCap","MilInt","RepCosts","MagRepCosts","ImpCosts","ImpStakes","ImpThreat","ImpReputation","ImpHonor","ImpRevenge","Gender","Age","Education","Race","USstate","Income","Pol7scale","PolParty","StrDem","StrRep","DemRep","MilExp","MilAssert_1","MilAssert_2","MilAssert_3","ReadForei","OnlineDisc","SimSurvey","CatchTri")

CP <- CP[mainvars]
CP[CP==-98]<-NA
CP[CP==-99]<-NA
CP[CP==-88]<-NA

# Survey Experiment Descriptive Statistics

#'## Descriptive statistics for outcome variables:
depvars <- c("LevelEsc","EcCosts","RiskWar","MilDeaths")

print(xtable(cbind(
  "n"=apply(!is.na(CP[depvars]),2,sum), 
  "min"=sapply(CP[depvars],min,na.rm=TRUE),
  "max"=sapply(CP[depvars],max,na.rm=TRUE),
  "mean"=sapply(CP[depvars],mean,na.rm=TRUE),
  "sd"=sapply(CP[depvars],sd,na.rm=TRUE)),
  row.names=colnames(CP[depvars]),digits=c(0,0,0,0,2,2)),comment=FALSE)

CPCB<-data.frame(tidyr::gather(CP[depvars]))

theme_set(theme_minimal())

ggplot(CPCB,aes(value)) +                     
  facet_wrap(~ key, scales = "free",drop=F) +
  geom_bar() + 
  theme(axis.title.x = element_blank()) 
#' Figure 24: Distribution of Responses: Outcome measures
ggsave(filename = paste0(images_dir, "Depvars_desc.pdf"),
       width = 10, height = 6)

#'## Descriptive statistics for mediating variables:

medvars <- c("RepStake","MagStake","MatCosts","Coercion","MilCap","MilInt","RepCosts","MagRepCosts","ImpCosts","ImpStakes","ImpThreat","ImpReputation","ImpHonor","ImpRevenge")

print(xtable(cbind(
  "n"=apply(!is.na(CP[medvars]),2,sum), 
  "min"=sapply(CP[medvars],min,na.rm=TRUE),
  "max"=sapply(CP[medvars],max,na.rm=TRUE),
  "mean"=sapply(CP[medvars],mean,na.rm=TRUE),
  "sd"=sapply(CP[medvars],sd,na.rm=TRUE)),
  row.names=colnames(CP[medvars]),digits=c(0,0,0,0,2,2)),comment=FALSE)

CPCB<-data.frame(tidyr::gather(CP[medvars]))
ggplot(CPCB,aes(value)) +                     
  facet_wrap(~ key, scales = "free",drop=F) +
  geom_bar() + 
  theme(axis.title.x = element_blank())
#' Figure 25: Distribution of Responses: Mediator measures
ggsave(filename = paste0(images_dir, "Medvars_desc.pdf"),
       width = 10, height = 8)

#'## Descriptive statistics for covariates:

covars <- c("Gender","Age","Education","Race","Income","Pol7scale","PolParty","StrDem","StrRep","DemRep","MilExp","MilAssert_1","MilAssert_2","MilAssert_3")

print(xtable(cbind(
  "n"=apply(!is.na(CP[covars]),2,sum), 
  "min"=sapply(CP[covars],min,na.rm=TRUE),
  "max"=sapply(CP[covars],max,na.rm=TRUE),
  "mean"=sapply(CP[covars],mean,na.rm=TRUE),
  "sd"=sapply(CP[covars],sd,na.rm=TRUE)),
  row.names=colnames(CP[covars]),digits=c(0,0,0,0,2,2)),comment=FALSE)

CPCB <- data.frame(tidyr::gather(CP[covars]))
ggplot(CPCB, aes(value)) +                     
  facet_wrap(~ key, scales = "free") +
  geom_bar() + 
  theme(axis.title.x = element_blank())
#' Figure 26: Distribution of Responses: Covariate measures
ggsave(paste0(images_dir, "Covars_desc.pdf"),
       width = 10, height = 8)

ggplot(CP,aes(USstate)) + geom_bar() + coord_flip()
#' Figure 27: Distribution of Responses: US State
ggsave(paste0(images_dir, "USstate_desc.pdf"),width = 10, height = 8)

# end log
sink()


